<?php
/**
 * Created by PhpStorm.
 * User: windows10
 * Date: 2020/4/1
 * Time: 16:16
 */

require '../../vendor/autoload.php';
use phpspider\core\phpspider;

/* Do NOT delete this comment */
/* 不要删除这段注释 */
$configs = array(
    'name' => '糗事百科',
    'domains' => array(
        'qiushibaike.com',
        'www.qiushibaike.com'
    ),
    'scan_urls' => array(
        'http://www.qiushibaike.com/'
    ),
    'content_url_regexes' => array(
        "http://www.qiushibaike.com/article/\d+"
    ),
    'list_url_regexes' => array(
        "http://www.qiushibaike.com/8hr/page/\d+\?s=\d+"
    ),
    'fields' => array(
        array(
            // 抽取内容页的文章内容
            'name' => "article_content",
            'selector' => "//*[@id='single-next-link']",
            'required' => true
        ),
        array(
            // 抽取内容页的文章作者
            'name' => "article_author",
            'selector' => "//div[contains(@class,'author')]//h2",
            'required' => true
        ),

    ),
//
//    'db_config' => array(
//        'host'  => '192.168.0.193',
//        'port'  => 3306,
//        'user'  => 'root',
//        'pass'  => 'Ithua@1129',
//        'name'  => 'demo',
//    )
    'log_show' => ture,
    'log_file' =>'./data/logs/qiushibaike.log',
    'export' => array(
        'type' => 'csv',
        'file' => './data/qiushibaike.csv', // data目录下
    )
);
$spider = new phpspider($configs);
$spider->start();